
# K = 10

# Clear Memory
rm( list=ls() )

# Set your working directory
getwd ()
setwd("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN")

library(readtext)
library(quanteda)
library(ggplot2)
library(stm)
library(igraph)
library(RColorBrewer)
library(wordcloud)
library(slam)
library(lda)
library (stopwords)
library("quanteda.textplots")
library("extrafont")

font_import(paths = "/Users/yongjaekim/Library/Fonts")
y

extrafont::loadfonts()


dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN/*.txt")
summary(dat_china3)

### multiple text files with docvars from filenames

dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN/*.txt", docvarsfrom = "filenames", dvsep = "-", docvarnames = c("Year", "Time", "Number", "CCP"))
summary(corpus(dat_china3), 486)

### Building a corpus 
dat_corpus_china3 <- corpus (dat_china3)
summary(dat_corpus_china3, 486)


# Make DFM https://quanteda.io/articles/pkgdown/examples/chinese.html
quant_dfm <-dfm(dat_corpus_china3, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove = stopwords(language = "zh", source = "misc"))
quant.dfm.trim <-dfm_trim(quant_dfm, min_docfreq = 0.10, max_docfreq = 0.95, docfreq_type = "prop") #min 1 %/max 95%
quant.dfm.trim

##### K = 10
###### (!) Top Topics (Plots)
################ That is better
#### K = 10
par(mfrow=c(1,1))

######? font and color?
font = if (Sys.info() ['sysname'] == "Darwin") "SimHei" else NULL
color = RColorBrewer::brewer.pal(8, "Dark2")
######

set.seed(100)
if (require("stm")) {
  my_fit20 <- stm(quant.dfm.trim, K = 10, verbose = FALSE)
  plot(my_fit20)
}

topic.count <- 10
dfm2stm <- convert(quant.dfm.trim, to = "stm")

## Top words of each topic
model.stm <- stm(
  dfm2stm$documents,
  dfm2stm$vocab,
  K = topic.count,
  data = dfm2stm$meta,
  init.type = "Spectral"
)

as.data.frame(t(labelTopics(model.stm, n = 10)$prob))

labelTopics(model.stm, n = 30)

##
labelTopics(model.stm)

#######################################
####### Estimations of 10 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")


####### Expected Topic Proportions, K = 10 https://globaldataversecommunityconsortium.github.io/dataverse-previewers/previewers/TextPreview.html?fileid=3142691&siteUrl=https://dataverse.harvard.edu&datasetid=3142685&datasetversion=1.0&locale=en Sung EUN Kim (2018)'s paper

plot(model.stm, type = "summary", xlim=c(0,.5), labeltype="prob", text.cex = (0.85), topics=c(1,2,3,4,5,6,7,8,9,10),
     topic.names=c("Topic 1: Consistent Development", "Topic 2: List of Party Elites", "Topic 3: Socialist Movement", "Topic 4: Command Economy", "Topic 5: Party Announcement",
                   "Topic 6: Political Discipline", "Topic 7: Political and Economic Reform", "Topic 8: Economic Development", "Topic 9: Maoism", "Topic 10: Communist Party"), family="Hei",
     main ="Topics and High Probability Words", 
     custom.labels=c( "全面, 现代, 推进 (comprehensive, modern, advance)",
                      "李, 王, 张 (Li, Wang, Zhang)", 
                      "革命,运动, 阶级 (revolution, movement, class)",
                      "经济, 计划, 发展 (economy, plan, development)",
                      "中央, 政治局, 会议 (central, politiburo, meeting)",
                      "工作, 监督, 纪律 (work, supervision, discipline)",
                      "改革, 路线, 邓小平 (reform, course, Deng Xiaoping)",
                      "经济, 改革, 市场 (economy, reform, market)",   
                      "领导, 毛泽东, 思想 (lead, Mao Zedong, thought)", 
                      "共产, 章程, 组织 (communism, charter, organization)"))

### updated: https://stackoverflow.com/questions/53090639/extracting-original-text-from-quanteda-dfm-for-use-in-stm
str (dfm2stm)

thoughts1 <- findThoughts(model.stm, texts = dfm2stm$meta$dat_corpus_china3, n = 30, topics = 7)

thoughts1
head(thoughts1)

### plotQuote(thoughts1, width=40, main="Topic 7")
plotQuote(thoughts1, width=40, main="Topic 7")


## Index: https://quanteda.io/reference/index-topic.html : Chinese Text Analysis
toks <- tokens(dat_corpus_china3[1:486])
toks

## Locate keywords-in-context: https://quanteda.io/reference/kwic.html
######

# single token matching
toks <- tokens(dat_corpus_china3[1:486])
kwic(toks, pattern = "改革*", valuetype = "glob", window = 3)

kwic(toks, pattern = "改革", valuetype = "regex", window = 3)

kwic(toks, pattern = "改革", valuetype = "fixed", window = 3)

# phrase matching
kwic(toks, pattern = phrase("邓小平* 改革"), window = 2)

kwic(toks, pattern = phrase("邓小平 改革"), valuetype = "regex", window = 2)

# use index: could not find function "index"?
idx <- index(toks, phrase("邓小平* 改革"))
as.data.frame(kw)

kwic(toks, index = idx, window = 2)

#
kw <- kwic(tokens(dat_corpus_china3[1:20]), "改革*")
is.kwic(kw)

is.kwic("Not a kwic")

is.kwic(kw[, c("pre", "post")])

kw <- kwic(toks, pattern = "改革*", valuetype = "glob", window = 3)
as.data.frame(kw)

kw <- kwic(toks, pattern = "周恩来*", valuetype = "glob", window = 3)
as.data.frame(kw)


##
options(max.print=100000)


#### findThoughts 
findThoughts(model.stm, texts = dat_corpus_china3, n = 3, topics = 1)
findThoughts(model.stm, texts = dat_corpus_china3, n = 3, topics = 2)
findThoughts(model.stm, texts = dat_corpus_china3, n = 3, topics = 3)
findThoughts(model.stm, texts = dat_corpus_china3, n = 5, topics = 4)
findThoughts(model.stm, texts = dat_corpus_china3, n = 2, topics = 5)

findThoughts(model.stm, texts = dat_corpus_china3, n = 10, topics = 6)
findThoughts(model.stm, texts = dat_corpus_china3, n = 18, topics = 7)
findThoughts(model.stm, texts = dat_corpus_china3, n = 38, topics = 8)
findThoughts(model.stm, texts = dat_corpus_china3, n = 7, topics = 9)
findThoughts(model.stm, texts = dat_corpus_china3, n = 11, topics = 10)

#######################################
####### Estimations of 10 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")

###! Same Y-axis: https://milesdwilliams15.github.io/Better-Graphics-for-the-stm-Package-in-R/; ylim=c()
### K = 10
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 10)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.04,.5), 
       main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  abline(v=1977, lty=2, lwd=1)
  text(1977,0.3, "1977", pos = 2, srt = 90)
  abline(v=1989, lty="dashed")
  text(1992,0.3, "1989", pos = 2, srt = 90)
  legend(x=1940, y=1.5, legend=c(i), bty ="n", xpd = T)
}

###### (2) c(3,3)*
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 10)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.04,.5), main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  title(main = "")
  abline(v=1989, lty="dashed")
  text(1989,0.3, "1989", pos = 2, srt = 90)
  abline(v=1977, lty="dashed")
  text(1977,0.3, "1977", pos = 2, srt = 90)
  legend(x=1977, y=0.7, legend=c(i), bty ="n", xpd = T)
}


# *marginal effects 3: https://uclspp.github.io/PUBLG088/stm.html 
topics_of_interest <- c(1, 7, 9)
topic_labels <- c("Consistent Development", "Political and Econoic Reform", "Maoism")

stm_effects <-estimateEffect(topics_of_interest ~ Number + s(Year),
                             stmobj = model.stm, 
                             meta = dfm2stm$meta,
                             uncertainty = "Global")
summary(stm_effects)


## *marginal effects 2 # method=continuous without labeltype
par(mfrow=c(1,1))
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     custom.labels = topic_labels, 
     main = "Marginal Effects of Topic", 
     xlab = "Figure 1",
     xlim=c(1920, 2020),
     ylim=c(-.04,.4))
abline(v=1956, lty="dashed")
text(1956,0.3, "1956", pos = 2, srt = 90)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)

## Method=continous with labeltype (Same graph) 
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     labeltype = "custom", ## put this
     custom.labels = topic_labels)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)
abline(v=1989, lty="dashed")
text(1989,0.3, "1989", pos = 2, srt = 90)

# 
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     labeltype = "custom", 
     custom.labels = topic_labels)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)
abline(v=1989, lty="dashed")
text(1989,0.3, "1989", pos = 2, srt = 90)

## Correlation structure
corr = topicCorr(model.stm)
plot(corr)


################################################
## No. of Topics (K = 40)

# K = 40

# Clear Memory
rm( list=ls() )

# Set your working directory
getwd ()
setwd("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN")

library(readtext)
library(quanteda)
library(ggplot2)
library(stm)
library(igraph)
library(wordcloud)
library(slam)
library(lda)
library (stopwords)
library("quanteda.textplots")
library("extrafont")

font_import(paths = "/Users/yongjae/Library/Fonts")
y

extrafont::loadfonts()


dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN/*.txt")
summary(dat_china3)

### multiple text files with docvars from filenames

dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN/*.txt", docvarsfrom = "filenames", dvsep = "-", docvarnames = c("Year", "Time", "Number", "CCP"))
summary(corpus(dat_china3), 486)

### Building a corpus 
dat_corpus_china3 <- corpus (dat_china3)
summary(dat_corpus_china3, 486)

# Make DFM https://quanteda.io/articles/pkgdown/examples/chinese.html
quant_dfm <-dfm(dat_corpus_china3, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove = stopwords(language = "zh", source = "misc"))
quant.dfm.trim <-dfm_trim(quant_dfm, min_docfreq = 0.10, max_docfreq = 0.95, docfreq_type = "prop") #min 1 %/max 95%
quant.dfm.trim

##### K = 40
###### (!) Top Topics (Plots)
################ That is better
#### K = 40
par(mfrow=c(1,1))

######? font and color?
font = if (Sys.info() ['sysname'] == "Darwin") "SimHei" else NULL
color = RColorBrewer::brewer.pal(8, "Dark2")
######

set.seed(100)
if (require("stm")) {
  my_fit20 <- stm(quant.dfm.trim, K = 40, verbose = FALSE)
  plot(my_fit20)
}

topic.count <- 40
dfm2stm <- convert(quant.dfm.trim, to = "stm")

## Top words of each topic
model.stm <- stm(
  dfm2stm$documents,
  dfm2stm$vocab,
  K = topic.count,
  data = dfm2stm$meta,
  init.type = "Spectral"
)

as.data.frame(t(labelTopics(model.stm, n = 10)$prob))

labelTopics(model.stm, n = 10)

##
labelTopics(model.stm)

#######################################
####### Estimations of 40 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")

###! Same Y-axis: https://milesdwilliams15.github.io/Better-Graphics-for-the-stm-Package-in-R/; ylim=c()
### K = 40
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 40)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.04,.5), 
       main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  abline(v=1977, lty=2, lwd=1)
  text(1977,0.3, "1977", pos = 2, srt = 90)
  abline(v=1989, lty="dashed")
  text(1992,0.3, "1989", pos = 2, srt = 90)
  legend(x=1940, y=1.5, legend=c(i), bty ="n", xpd = T)
}

###### (2) c(3,3)*
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 40)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.04,.4), main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  title(main = "")
  abline(v=1989, lty="dashed")
  text(1989,0.3, "1989", pos = 2, srt = 90)
  abline(v=1977, lty="dashed")
  text(1977,0.3, "1977", pos = 2, srt = 90)
  legend(x=1977, y=0.7, legend=c(i), bty ="n", xpd = T)
}

## Correlation structure
corr = topicCorr(model.stm)
plot(corr)


********
######################################################
##### Different Corpus (Party Documents) #############

# K = 10

# Clear Memory
rm( list=ls() )

# Set your working directory
getwd ()
setwd("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN_woEditorials")

library(readtext)
library(quanteda)
library(ggplot2)
library(stm)
library(igraph)
library(wordcloud)
library(slam)
library(lda)
library (stopwords)
library("quanteda.textplots")
library("extrafont")

font_import(paths = "/Users/yongjae/Library/Fonts")
y

extrafont::loadfonts()


dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN_woEditorials/*.txt")
summary(dat_china3)

### multiple text files with docvars from filenames

dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN_woEditorials/*.txt", docvarsfrom = "filenames", dvsep = "-", docvarnames = c("Year", "Time", "Number", "CCP"))
summary(corpus(dat_china3), 444)

### Building a corpus 
dat_corpus_china3 <- corpus (dat_china3)
summary(dat_corpus_china3, 444)

# Make DFM https://quanteda.io/articles/pkgdown/examples/chinese.html
quant_dfm <-dfm(dat_corpus_china3, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove = stopwords(language = "zh", source = "misc"))
quant.dfm.trim <-dfm_trim(quant_dfm, min_docfreq = 0.10, max_docfreq = 0.95, docfreq_type = "prop") #min 1 %/max 95%
quant.dfm.trim

##### K = 10
###### (!) Top Topics (Plots)
################ That is better
#### K = 10
par(mfrow=c(1,1))

######? font and color?
font = if (Sys.info() ['sysname'] == "Darwin") "SimHei" else NULL
color = RColorBrewer::brewer.pal(8, "Dark2")
######

set.seed(100)
if (require("stm")) {
  my_fit20 <- stm(quant.dfm.trim, K = 10, verbose = FALSE)
  plot(my_fit20)
}

topic.count <- 10
dfm2stm <- convert(quant.dfm.trim, to = "stm")

## Top words of each topic
model.stm <- stm(
  dfm2stm$documents,
  dfm2stm$vocab,
  K = topic.count,
  data = dfm2stm$meta,
  init.type = "Spectral"
)

as.data.frame(t(labelTopics(model.stm, n = 30)$prob))

labelTopics(model.stm, n = 30)

##
labelTopics(model.stm)

#######################################
####### Estimations of 10 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")

#######################################
####### Estimations of 10 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")

###! Same Y-axis: https://milesdwilliams15.github.io/Better-Graphics-for-the-stm-Package-in-R/; ylim=c()
### K = 10
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 10)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.04,.5), 
       main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  abline(v=1977, lty=2, lwd=1)
  text(1977,0.3, "1977", pos = 2, srt = 90)
  abline(v=1989, lty="dashed")
  text(1992,0.3, "1989", pos = 2, srt = 90)
  legend(x=1940, y=1.5, legend=c(i), bty ="n", xpd = T)
}

###### (2) c(3,3)*
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 10)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.04,.4), main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  title(main = "")
  abline(v=1989, lty="dashed")
  text(1989,0.3, "1989", pos = 2, srt = 90)
  abline(v=1977, lty="dashed")
  text(1977,0.3, "1977", pos = 2, srt = 90)
  legend(x=1977, y=0.7, legend=c(i), bty ="n", xpd = T)
}

# *marginal effects 3: https://uclspp.github.io/PUBLG088/stm.html 
topics_of_interest <- c(1, 7, 10)
topic_labels <- c("Consistent Development", "Socialist Movement", "Maoism")

stm_effects <-estimateEffect(topics_of_interest ~ Number + s(Year),
                             stmobj = model.stm, 
                             meta = dfm2stm$meta,
                             uncertainty = "Global")
summary(stm_effects)


## *marginal effects 2 # method=continuous without labeltype
par(mfrow=c(1,1))
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     custom.labels = topic_labels, 
     main = "Marginal Effects of Topic", 
     xlab = "Figure 1",
     xlim=c(1920, 2020),
     ylim=c(-.04,.4))
abline(v=1956, lty="dashed")
text(1956,0.3, "1956", pos = 2, srt = 90)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)

## Method=continous with labeltype (Same graph) 
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     labeltype = "custom", ## put this
     custom.labels = topic_labels)
abline(v=1956, lty="dashed")
text(1956,0.3, "1956", pos = 2, srt = 90)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)
abline(v=1989, lty="dashed")
text(1989,0.3, "1989", pos = 2, srt = 90)

# 
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     labeltype = "custom", 
     custom.labels = topic_labels)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)
abline(v=1989, lty="dashed")
text(1989,0.3, "1989", pos = 2, srt = 90)

## Correlation structure
corr = topicCorr(model.stm)
plot(corr)


******
########################################
### Different Corpus (Editorials) ######

# K = 10

# Clear Memory
rm( list=ls() )

# Set your working directory
getwd ()
setwd("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN_woEditorials")

library(readtext)
library(quanteda)
library(ggplot2)
library(stm)
library(igraph)
library(wordcloud)
library(slam)
library(lda)
library (stopwords)
library("quanteda.textplots")
library("extrafont")

font_import(paths = "/Users/yongjae/Library/Fonts")
y

extrafont::loadfonts()


dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN_woEditorials/*.txt")
summary(dat_china3)

### multiple text files with docvars from filenames

dat_china3 <-readtext("/Users/yongjaekim/Documents/Papers/CCP Control2/CCP_DOCS_CN_woEditorials/*.txt", docvarsfrom = "filenames", dvsep = "-", docvarnames = c("Year", "Time", "Number", "CCP"))
summary(corpus(dat_china3), 42)

### Building a corpus 
dat_corpus_china3 <- corpus (dat_china3)
summary(dat_corpus_china3, 42)

# Make DFM https://quanteda.io/articles/pkgdown/examples/chinese.html
quant_dfm <-dfm(dat_corpus_china3, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove = stopwords(language = "zh", source = "misc"))
quant.dfm.trim <-dfm_trim(quant_dfm, min_docfreq = 0.10, max_docfreq = 0.95, docfreq_type = "prop") #min 1 %/max 95%
quant.dfm.trim

##### K = 10
###### (!) Top Topics (Plots)
################ That is better
#### K = 10
par(mfrow=c(1,1))

######? font and color?
font = if (Sys.info() ['sysname'] == "Darwin") "SimHei" else NULL
color = RColorBrewer::brewer.pal(8, "Dark2")
######

set.seed(100)
if (require("stm")) {
  my_fit20 <- stm(quant.dfm.trim, K = 10, verbose = FALSE)
  plot(my_fit20)
}

topic.count <- 10
dfm2stm <- convert(quant.dfm.trim, to = "stm")

## Top words of each topic
model.stm <- stm(
  dfm2stm$documents,
  dfm2stm$vocab,
  K = topic.count,
  data = dfm2stm$meta,
  init.type = "Spectral"
)

as.data.frame(t(labelTopics(model.stm, n = 30)$prob))

labelTopics(model.stm, n = 30)

##
labelTopics(model.stm)

#######################################
####### Estimations of 10 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")

#######################################
####### Estimations of 10 topics
model.stm.labels <- labelTopics(model.stm, 1:topic.count)
dfm2stm$meta$datum <- as.numeric(dfm2stm$meta$Year)
model.stm.ee <- estimateEffect(1:topic.count ~ Number + s(Year), model.stm, meta = dfm2stm$meta, uncertainty = "Global")

###! Same Y-axis: https://milesdwilliams15.github.io/Better-Graphics-for-the-stm-Package-in-R/; ylim=c()
### K = 10
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 10)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.3,1.0), 
       main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  abline(v=1977, lty=2, lwd=1)
  text(1977,0.3, "1977", pos = 2, srt = 90)
  abline(v=1989, lty="dashed")
  text(1992,0.3, "1989", pos = 2, srt = 90)
  legend(x=1940, y=1.5, legend=c(i), bty ="n", xpd = T)
}

###### (2) c(3,3)*
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 10)))
{
  plot(model.stm.ee, "Year", method = "continuous", topics = i, ylim=c(-.3,1.0), main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
  title(main = "")
  abline(v=1989, lty="dashed")
  text(1989,0.3, "1989", pos = 2, srt = 90)
  abline(v=1977, lty="dashed")
  text(1977,0.3, "1977", pos = 2, srt = 90)
  legend(x=1977, y=0.7, legend=c(i), bty ="n", xpd = T)
}

# *marginal effects 3: https://uclspp.github.io/PUBLG088/stm.html 
topics_of_interest <- c(1, 6, 9)
topic_labels <- c("Socialist Movement", "Econoimc Development", "Maoism")

stm_effects <-estimateEffect(topics_of_interest ~ Number + s(Year),
                             stmobj = model.stm, 
                             meta = dfm2stm$meta,
                             uncertainty = "Global")
summary(stm_effects)


## *marginal effects 2 # method=continuous without labeltype
par(mfrow=c(1,1))
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     custom.labels = topic_labels, 
     main = "Marginal Effects of Topic", 
     xlab = "Figure 1",
     xlim=c(1945, 2020),
     ylim=c(-.04,.4))
abline(v=1956, lty="dashed")
text(1956,0.3, "1956", pos = 2, srt = 90)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)

## Method=continous with labeltype (Same graph) 
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     labeltype = "custom", ## put this
     xlim=c(1945, 2012),
     ylim=c(-.04,.4),
     custom.labels = topic_labels)
abline(v=1956, lty="dashed")
text(1956,0.3, "1956", pos = 2, srt = 90)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)
abline(v=1989, lty="dashed")
text(1989,0.3, "1989", pos = 2, srt = 90)

# 
plot(stm_effects, 
     covariate = "Year",
     topics = topics_of_interest, 
     model = model.stm, 
     method = "continuous", 
     labeltype = "custom", 
     xlim=c(1945, 2012),
     ylim=c(-.04,.4),
     custom.labels = topic_labels)
abline(v=1977, lty="dashed")
text(1977,0.3, "1977", pos = 2, srt = 90)
abline(v=1989, lty="dashed")
text(1989,0.3, "1989", pos = 2, srt = 90)

## Correlation structure
corr = topicCorr(model.stm)
plot(corr)









